{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import glob\n",
    "import os\n",
    "import xml.etree.ElementTree as ET\n",
    "from shutil import copyfile, copytree\n",
    "import os.path as osp\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pickle as pkl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Read labels from structured folder\"\"\"\n",
    "files = glob.glob(\"../aic20_attributes/types/test/*/*.jpg\")\n",
    "img2type = {}\n",
    "for file in files:\n",
    "    tm = file.split('/')\n",
    "    file_name = int(tm[-1].split('.')[0])\n",
    "    vehtype = int(tm[-2])\n",
    "    img2type[file_name] = vehtype\n",
    "pkl.dump(img2type, open(\"../aic20_attributes/test_types.pkl\", \"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Read train_label.xml and output: \n",
    "    - img2id:  dict[imageID] = vehicleID\n",
    "    - id2imgs: dict[vehicleId] = [list of imgs id]\"\"\"\n",
    "\n",
    "xml_train_lbl = ET.parse('../aic20_data/origin/train_label.xml', parser=ET.XMLParser(encoding='iso-8859-5'))\n",
    "root = xml_train_lbl.getroot()\n",
    "img2id = {}\n",
    "id2imgs= {}\n",
    "for child in root.iter(\"Item\"):\n",
    "    imgId = child.attrib[\"imageName\"].replace(\".jpg\",\"\")\n",
    "    vehId = int(child.attrib[\"vehicleID\"])\n",
    "    img2id[imgId] = vehId\n",
    "    if vehId not in id2imgs: id2imgs[vehId] = []\n",
    "    id2imgs[vehId].append(imgId)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Generate csv of vehicle type of each training image\n",
    "    - input: annotated vehicle types with following structure: <vehi_type_id>/<vehi_id>/<vehi_tracklet>\n",
    "    - output: csv file. each row contains <img_id>, <vehi_type_id> \"\"\"\n",
    "anno_paths = glob.glob(\"../aic20_data/aic20_train_vehicle_types/*/*\")\n",
    "id2vehi_type = {}\n",
    "for annoPath in anno_paths:\n",
    "    vehType = annoPath.split('/')[-2]\n",
    "    vehId   = annoPath.split('/')[-1]\n",
    "    id2vehi_type[int(vehId)] = vehType\n",
    "\n",
    "vehi_type = [id2vehi_type[img2id[img]] for img in sorted(img2id.keys())]\n",
    "df = pd.DataFrame({\"img_id\":list(sorted(img2id.keys())), \"vehicle_type\": vehi_type}, columns= ['img_id', 'vehicle_type'])\n",
    "df.to_csv(\"../aic20_data/attributes_lbls/train_vehicle_type.csv\", index = False, header = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Convert data split files from vltanh to my format\n",
    "    - input: csv file. with 2 columns: `camera_id` `vehicle_id` \n",
    "    - output: file name .lst\"\"\"\n",
    "inp_f = \"reid_gallery_hard.csv\"\n",
    "out_f = \"gallery_hard.lst\"\n",
    "df = pd.read_csv(inp_f)\n",
    "train_tracks2vehicle = {}\n",
    "selected_tracks = []\n",
    "for i, cam_id in enumerate(df['camera_id']):\n",
    "    selected_tracks.append(str(cam_id) + \"_\"+ str(df['vehicle_id'][i]).zfill(4))\n",
    "with open(out_f, \"w\") as fo:\n",
    "    for child in root.iter(\"Item\"):\n",
    "        name = child.attrib[\"imageName\"]\n",
    "        veh_id = child.attrib[\"vehicleID\"]\n",
    "        cam_id = child.attrib[\"cameraID\"]\n",
    "        m  = str(cam_id)+ \"_\" + str(veh_id)\n",
    "        if m in selected_tracks:\n",
    "            fo.write(name + \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"../AIC20_track2/data/train_track_id.txt\") as fi:\n",
    "    lines = fi.readlines()\n",
    "train_tracks = {}\n",
    "for i,track in enumerate(lines):\n",
    "    train_tracks[i] = track.strip().split(' ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_tracks2id = {k:img2id[int(train_tracks[k][0])] for k in train_tracks.keys()}\n",
    "id2train_tracks = {}\n",
    "for track in train_tracks2id.keys():\n",
    "    veh_id = train_tracks2id[track]\n",
    "    if veh_id not in id2train_tracks:\n",
    "        id2train_tracks[veh_id] = []\n",
    "    id2train_tracks[veh_id].append(track)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_thumbnails = \"../AIC20_track2/data/image_train_thumbnails/\"\n",
    "path_train_vehicles   = \"../AIC20_track2/train_vehicles\"\n",
    "for veh_id in id2train_tracks.keys():\n",
    "    veh_id_str = str(veh_id).zfill(3)\n",
    "    out_path = osp.join(path_train_vehicles,veh_id_str)\n",
    "    os.makedirs(out_path, exist_ok=True)\n",
    "    for track in id2train_tracks[veh_id]:\n",
    "        track_img_name = str(track).zfill(3)+\".jpg\"\n",
    "        copyfile(osp.join(train_thumbnails,track_img_name), osp.join(out_path,track_img_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"vehicle_id_draft.csv\")\n",
    "train_tracks2vehicle = {}\n",
    "for i, track in enumerate(df['track']):\n",
    "    track = int(track)\n",
    "    veh_id = train_tracks2id[track]\n",
    "    if (pd.isnull(df['veh_type'][i])):\n",
    "        draft_lbl = -1\n",
    "    else:\n",
    "        draft_lbl = int(df['veh_type'][i])\n",
    "    if veh_id not in train_tracks2vehicle:\n",
    "        train_tracks2vehicle[veh_id] = draft_lbl\n",
    "    if (i > 1170):\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vehicle_types_fold = \"../AIC20_track2/train_vehicle_types/\"\n",
    "for veh_id in train_tracks2vehicle.keys():\n",
    "    veh_type = str(train_tracks2vehicle[veh_id])\n",
    "    veh_id = str(veh_id).zfill(3)\n",
    "    out_path = osp.join(vehicle_types_fold,veh_type)\n",
    "    os.makedirs(out_path, exist_ok=True)\n",
    "    copytree(osp.join(path_train_vehicles,veh_id), osp.join(out_path,veh_id))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(len(train_tracks2vehicle))\n",
    "print(len(id2train_tracks))\n",
    "[t for t in id2train_tracks.keys() if t not in train_tracks2vehicle.keys()]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
